## ============================================================================
## 01_manuscript.R
##
## Purpose:   Reproduce all main manuscript figures and tables.
##            Corresponds to Sections 2-4 of the manuscript.
##            Must be run before any SI scripts.
##
## Inputs:    combined_data.rds (via 00_setup.R),
##            asx_results_2019to2024.rds,
##            unis_and_museums.rds
## Outputs:   Figures/fig1.pdf,
##            Figures/fig2.pdf,
##            Figures/fig3.pdf,
##            Figures/fig4.pdf,
##            Tables/table1.tex,
##            Tables/ates_study1_by_sample_unadj.rds (pipeline input for
##            06_si_s2_audience_effects.R)
## ============================================================================

source("00_setup.R")

## ---- Figure 1: Diffusion of land acknowledgments ----------------------------
gpt_asx <-
  read_rds("asx_results_2019to2024.rds") %>%
  mutate(org_type = "Company", country = "Australia") %>%
  rename(org_name = company_name)

uni_dat <-
  read_rds("unis_and_museums.rds") %>%
  mutate(
    year_adopted = case_when(
      year_adopted == "Not applicable" ~ NA,
      .default = year_adopted
    ),
    first_appearance = as.numeric(year_adopted)
  ) %>%
  select(org_name, org_type, country, first_appearance, region)

years <- 2010:2024

uni_panel <-
  uni_dat %>%
  mutate(org_id = row_number()) %>%
  crossing(year = years) %>%
  mutate(land_acknowledgment = case_when(
    is.na(first_appearance) ~ 0,
    year >= first_appearance ~ 1,
    TRUE ~ 0
  )) %>%
  select(year, org_name, org_type, country, land_acknowledgment) %>%
  distinct()

asx_panel <-
  gpt_asx %>%
  select(year, org_name, org_type, country, land_acknowledgment) %>%
  complete(
    org_name,
    year = 2010:2024,
    fill = list(land_acknowledgment = 0)
  ) %>%
  group_by(org_name) %>%
  fill(org_type, country, .direction = "downup") %>%
  ungroup() %>%
  arrange(org_name, year)

la_panel <- bind_rows(asx_panel, uni_panel)

la_summary <-
  la_panel %>%
  mutate(
    org_type = case_when(
      org_type %in% c("Museum", "Gallery") ~ "Museum/Gallery",
      org_type %in% c("Private University", "Public University") ~ "University",
      .default = org_type
    )
  ) %>%
  group_by(year, org_type, country) %>%
  summarise(proportion = mean(land_acknowledgment, na.rm = TRUE),
            .groups = "drop")

country_cols <- c("Australia" = "black", "United States" = "grey50")

linewidth_map <- c("University" = 1.2,
                   "Museum/Gallery" = 1,
                   "Company" = 0.8)

p <-
  ggplot(la_summary,
         aes(year, proportion,
             group = interaction(country, org_type),
             colour = country,
             linetype = org_type,
             linewidth = org_type)) +
  geom_line() +
  scale_linewidth_manual(values = linewidth_map, guide = "none") +
  scale_colour_manual(values = country_cols) +
  scale_linetype_manual(values = c("University" = "solid",
                                   "Museum/Gallery" = "dotdash",
                                   "Company" = "dotted")) +
  scale_x_continuous(limits = c(2009.6, 2024.4),
                     breaks = seq(2010, 2024, 2),
                     expand = c(0.01, 0.01)) +
  scale_y_continuous(labels = scales::percent,
                     limits = c(0, 1.03),
                     expand = c(0, 0)) +
  theme_classic(base_size = 14) +
  labs(title = NULL, x = NULL, y = NULL) +
  theme(legend.position = "none",
        axis.text = element_text(colour = "black"))

label_data <- 
  la_summary %>%
  filter(year == 2024) %>%
  mutate(label = case_when(
    org_type == "University"     & country == "Australia"     ~ "Aus. universities",
    org_type == "Museum/Gallery" & country == "Australia"     ~ "Aus. museums",
    org_type == "Company"        & country == "Australia"     ~ "Aus. companies",
    org_type == "University"     & country == "United States" ~ "U.S. universities",
    org_type == "Museum/Gallery" & country == "United States" ~ "U.S. museums"
  ),
  proportion = case_when(
    label == "Aus. universities"  ~ 1.01,
    label == "Aus. museums"       ~ 0.96,
    label == "U.S. universities"  ~ 0.867,
    label == "U.S. museums"       ~ 0.47,
    label == "Aus. companies"     ~ 0.52
  ),
  x_lab = 2024.15,  
  # nudge each so text does not sit on top of the line
  label_y = case_when(
    label == "Aus. universities"  ~ proportion + 0.02,
    label == "Aus. museums"       ~ proportion - 0.02,
    label == "U.S. universities"  ~ proportion + 0.01,
    label == "U.S. museums"       ~ proportion - 0.03,
    label == "Aus. companies"     ~ proportion + 0.02
  ))

g <-
  p +
  geom_text(data = label_data,
            aes(x = x_lab, y = proportion, label = label, colour = country),
            hjust = 0,
            size = 4,
            show.legend = FALSE) +
  coord_cartesian(clip = "off") +
  theme(plot.margin = margin(5.5, 75, 5.5, 5.5))
g

ggsave(
  filename = paste0(fig_dir, "fig1.pdf"),
  g,
  width = 6.5,
  height = 4.25
)

## ---- Table 1 and Figure 2: Baseline means by sample ------------------------

subset_dat <-
  combined_dat %>%
  filter(Z_land_info == 0) %>%
  mutate(sample = factor(
    sample,
    levels = c("AU", "US"),
    labels = c("Australia", "United States")
  ))

outcome_vars <- c("y_substantive", "y_recall_binary", "y_acknowledge_binary",
                  "y_infoseek_binary", "y_symbolic", "y_guilt", "y_cbr")

sample_means <- map_dfr(outcome_vars, function(y_var) {
  subset_dat %>%
    group_by(sample) %>%
    group_modify(~ tidy(lm_robust(reformulate("1", y_var), data = .x))) %>%
    ungroup() %>%
    filter(term == "(Intercept)") %>%
    mutate(outcome = y_var)
}) %>%
  mutate(outcome = factor(
    outcome,
    levels = c("y_recall_binary", "y_acknowledge_binary", "y_infoseek_binary",
               "y_guilt", "y_cbr", "y_symbolic", "y_substantive"),
    labels = c(
      "Indigenous Awareness",
      "Indigenous Land",
      "Information Seeking",
      "Collective Guilt",
      "Colorblind Racism",
      "Symbolic Reparations",
      "Substantive Reparations"
    )
  ))


## Calculate differences and prep table
est_diff <-
  sample_means %>%
  select(sample, outcome, estimate, std.error, p.value) %>%
  pivot_wider(names_from = sample, values_from = c(estimate, std.error, p.value)) %>%
  mutate(
    diff_estimate  = `estimate_Australia` - `estimate_United States`,
    diff_std_error = sqrt(`std.error_Australia`^2 + `std.error_United States`^2),
    diff_p_value   = 2 * pnorm(abs(diff_estimate) / diff_std_error, lower.tail = FALSE),
    diff_ci_lwr    = diff_estimate - 1.96 * diff_std_error,
    diff_ci_upr    = diff_estimate + 1.96 * diff_std_error
  )

dim_tab <-
  sample_means %>%
  mutate(across(where(is.numeric), \(x) round(x, 3))) %>%
  mutate(entry = table_entry(est = estimate, se = std.error)) %>%
  select(sample, outcome, entry) %>%
  pivot_wider(names_from = sample, values_from = entry) %>%
  left_join(
    est_diff %>%
      mutate(
        diff = table_entry(est = diff_estimate, se = diff_std_error),
        diff_ci = paste0("[", round(diff_ci_lwr, 2), ", ",
                         round(diff_ci_upr, 2), "]"),
        diff_p_value = case_when(
          diff_p_value < 0.001 ~ "< 0.001",
          .default = paste0(round(diff_p_value, 3))
        )
      ) %>%
      select(outcome, diff, diff_ci, diff_p_value),
    by = "outcome"
  ) %>%
  arrange(outcome)

## Export Table 1
dim_tab %>%
  filter(outcome %in% c("Indigenous Awareness", "Indigenous Land",
                        "Information Seeking")) %>%
  mutate(outcome = paste0("\\textit{", outcome, "}")) %>%
  select(outcome, Australia, `United States`, diff, diff_ci, diff_p_value) %>%
  column_to_rownames("outcome") %>%
  xtable() %>%
  print(
    include.rownames = TRUE,
    include.colnames = FALSE,
    only.contents = TRUE,
    type = "latex",
    sanitize.text.function = identity,
    comment = FALSE,
    hline.after = NULL,
    file = paste0(tab_dir, "table1.tex")
  )

## Figure 2
p_adjust <- 0.4

p <-
  sample_means %>%
  filter(
    outcome %in% c(
      "Collective Guilt",
      "Colorblind Racism",
      "Symbolic Reparations",
      "Substantive Reparations"
    )
  ) %>%
  ggplot(., aes(x = estimate, y = outcome, group = sample, color = sample,
                fill = sample)) +
  geom_vline(xintercept = 4, color = "black", linetype = "solid",
             linewidth = 0.2) +
  geom_errorbar(
    aes(xmin = conf.low, xmax = conf.high),
    width = 0,
    position = position_dodgev(height = p_adjust),
    linewidth = 1.5,
    orientation = "y"
  ) +
  geom_point(
    position = position_dodgev(height = p_adjust),
    size = 4,
    pch = 22,
    color = "white"
  ) +
  facet_col(~ outcome, space = "free", scales = "free_y") +
  scale_x_continuous(limits = c(3.4, 5.1), breaks = seq(3.5, 5, by = 0.5)) +
  scale_color_manual("", values = c("black", "darkgrey")) +
  scale_fill_manual("", values = c("black", "darkgrey")) +
  theme_tufte(base_size = 14) +
  labs(subtitle = "", x = "Average score (1 - 7 scale)", y = "") +
  theme(
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_line(linewidth = 0.25),
    axis.line.x = element_line(linewidth = 0.25, color = "black"),
    axis.title.x = element_text(size = 14),
    axis.text.x = element_text(size = 12, color = "black"),
    axis.text.y = element_blank(),
    strip.text = element_text(size = 14, hjust = 0.5),
    plot.margin = unit(c(t = -1, r = 0.1, b = 0, l = -1), "lines"),
    legend.position = "none",
    legend.margin = margin(0, 0, 0, 0),
    legend.box.spacing = unit(0, "pt"),
    legend.box.margin = margin(0, 0, 0, 0)
  )

g <-
  p +
  geom_label(
    data = p$data %>%
      filter(outcome == "Collective Guilt") %>%
      mutate(estimate = case_when(
        sample == "United States" ~ 4.8,
        sample == "Australia"     ~ 4.75
      )),
    aes(label = sample),
    position = position_dodgev(height = 1),
    color = "white",
    size = 3.5,
    show.legend = FALSE
  )

ggsave(
  filename = paste0(fig_dir, "fig2.pdf"),
  g,
  width = 4,
  height = 4
)

## ---- In-text estimates: Section 2.1 ----------------------------------------

## 22% of untreated respondents that expressed interest in learning more also
## clicked the trackable link
combined_dat %>%
  filter(y_infoseek_binary == 1) %>%
  group_by(Z_land_info) %>% 
  summarise(click = mean(y_infoseek_click, na.rm = TRUE))

## NB: rates are comparable when split by sample:
combined_dat %>%
  filter(y_infoseek_binary == 1) %>%
  group_by(Z_land_info, sample) %>% 
  summarise(click = mean(y_infoseek_click, na.rm = TRUE))

## Table 1
cat("Table 1 (Section 2.1):\n")
print(dim_tab %>%
        filter(outcome %in% c("Indigenous Awareness", "Indigenous Land",
                              "Information Seeking")))

## Figure 2 cross-national differences
cat("Figure 2 cross-national differences (Section 2.1):\n")
print(est_diff %>%
        filter(outcome %in% c("Collective Guilt", "Colorblind Racism",
                              "Symbolic Reparations", "Substantive Reparations")) %>%
        mutate(
          diff  = round(diff_estimate, 2),
          se    = round(diff_std_error, 2),
          ci_lwr = round(diff_ci_lwr, 2),
          ci_upr = round(diff_ci_upr, 2),
          pval  = case_when(
            diff_p_value < 0.001 ~ "< 0.001",
            .default = as.character(round(diff_p_value, 3))
          )
        ) %>%
        select(outcome, diff, se, ci_lwr, ci_upr, pval))

## ---- Figure 3: Study 1 average treatment effects ----------------------------

est_df <-
  combined_dat %>%
  select(
    sample,
    y_guilt_idx_s,
    y_cbr_idx_s,
    y_substantive_idx_s,
    y_symbolic_idx_s,
    y_voice_vote_s,
    y_infoseek_s,
    y_acknowledge_s,
    y_recall_binary_s,
    Z_land_info
  ) %>%
  pivot_longer(cols = starts_with("y_"),
               names_to = "outcome_group",
               values_to = "y") %>%
  mutate(
    Z = ifelse(Z_land_info == 1, "Treatment", "Control"),
    outcome_group = case_when(
      outcome_group == "y_guilt_idx_s"        ~ "Collective Guilt",
      outcome_group == "y_cbr_idx_s"          ~ "Colorblind Racism",
      outcome_group == "y_symbolic_idx_s"     ~ "Symbolic Reparations",
      outcome_group == "y_substantive_idx_s"  ~ "Substantive Reparations",
      outcome_group == "y_voice_vote_s"       ~ "Voting for Referendum",
      outcome_group == "y_infoseek_s"         ~ "Information Seeking",
      outcome_group == "y_acknowledge_s"      ~ "Indigenous Land",
      outcome_group == "y_recall_binary_s"    ~ "Indigenous Awareness"
    )
  ) %>%
  filter(!is.na(Z))

tmp <-
  est_df %>%
  filter(outcome_group == "Voting for Referendum", sample == "AU") %>%
  group_by(outcome_group, sample) %>%
  group_modify(~ tidy(lm_robust(y ~ Z, data = .x))) %>%
  ungroup() %>%
  filter(term != "(Intercept)")

summary_df <-
  est_df %>%
  filter(outcome_group != "Voting for Referendum") %>%
  group_by(outcome_group, sample) %>%
  group_modify(~ tidy(lm_robust(y ~ Z, data = .x))) %>%
  ungroup() %>%
  filter(term == "ZTreatment") %>%
  bind_rows(tmp) %>%
  mutate(
    outcome_group = factor(
      outcome_group,
      levels = c(
        "Voting for Referendum",
        "Substantive Reparations",
        "Symbolic Reparations",
        "Colorblind Racism",
        "Collective Guilt",
        "Information Seeking",
        "Indigenous Land",
        "Indigenous Awareness"
      ),
      labels = c(
        "Voice Referendum",
        "Substantive Reparations",
        "Symbolic Reparations",
        "Colorblind Racism",
        "Collective Guilt",
        "Information Seeking",
        "Indigenous Land",
        "Indigenous Awareness"
      )
    ),
    sample = factor(
      sample,
      levels = c("AU", "US"),
      labels = c("Australia", "United States")
    )
  ) %>%
  ungroup()

## Pipeline dependency for 06_si_s2_audience_effects.R
write_rds(summary_df, "ates_study1_by_sample_unadj.rds")

## For each experiment, the CIs and p-values are adjusted for multiple 
## comparisons across outcome measures (7 outcomes AU, 6 US).
summary_df_unadj <-
  summary_df %>%
  filter(outcome_group != "Indigenous Awareness") %>%
  mutate(
    ci.lwr95_adj = case_when(
      sample == "Australia"     ~ estimate - qnorm(1 - (0.05 / 7) / 2) * std.error,
      sample == "United States" ~ estimate - qnorm(1 - (0.05 / 6) / 2) * std.error
    ),
    ci.upr95_adj = case_when(
      sample == "Australia"     ~ estimate + qnorm(1 - (0.05 / 7) / 2) * std.error,
      sample == "United States" ~ estimate + qnorm(1 - (0.05 / 6) / 2) * std.error
    ),
    ci.lwr90_adj = case_when(
      sample == "Australia"     ~ estimate - qnorm(1 - (0.10 / 7) / 2) * std.error,
      sample == "United States" ~ estimate - qnorm(1 - (0.10 / 6) / 2) * std.error
    ),
    ci.upr90_adj = case_when(
      sample == "Australia"     ~ estimate + qnorm(1 - (0.10 / 7) / 2) * std.error,
      sample == "United States" ~ estimate + qnorm(1 - (0.10 / 6) / 2) * std.error
    )
  ) %>%
  group_by(sample) %>%
  mutate(pval_adj = p.adjust(p.value, method = "bonferroni")) %>%
  ungroup() %>%
  select(
    outcome_group, sample, estimate, std.error, p.value,
    ci.lwr95_adj, ci.upr95_adj, pval_adj,
    ci.lwr90_adj, ci.upr90_adj
  ) %>%
  arrange(outcome_group)

p <-
  summary_df_unadj %>%
  mutate(outcome_group = fct_rev(outcome_group)) %>%
  ggplot(data = ., aes(x = estimate, y = outcome_group, group = sample,
                       color = sample, fill = sample)) +
  geom_vline(xintercept = 0,    color = "black", linetype = "solid",  linewidth = 0.2) +
  geom_vline(xintercept = -0.2, color = "black", linetype = "dotted", linewidth = 0.2) +
  geom_vline(xintercept =  0.2, color = "black", linetype = "dotted", linewidth = 0.2) +
  geom_errorbar(
    aes(xmin = ci.lwr95_adj, xmax = ci.upr95_adj),
    width = 0,
    position = position_dodgev(height = 0.7),
    linewidth = .75,
    orientation = "y"
  ) +
  geom_errorbar(
    aes(xmin = ci.lwr90_adj, xmax = ci.upr90_adj),
    width = 0,
    position = position_dodgev(height = 0.7),
    linewidth = 1.5,
    orientation = "y"
  ) +
  geom_point(
    position = position_dodgev(height = 0.7),
    size = 4,
    pch = 22,
    color = "white"
  ) +
  facet_col(~ outcome_group, space = "free", scales = "free_y") +
  scale_x_continuous(limits = c(-0.32, 0.32),
                     breaks = c(-0.30, -0.20, -0.10, 0, 0.10, 0.20, 0.30)) +
  scale_color_manual("", values = c("black", "darkgrey")) +
  scale_fill_manual("", values = c("black", "darkgrey")) +
  theme_tufte(base_size = 14) +
  labs(subtitle = "", x = "Average treatment effect (in standard units)", y = "") +
  theme(
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_line(linewidth = 0.25),
    axis.line.x = element_line(linewidth = 0.25),
    axis.text.x = element_text(size = 12, color = "black"),
    axis.text.y = element_blank(),
    strip.text = element_text(size = 14, hjust = 0.5),
    plot.margin = unit(c(t = -1, r = 0, b = 0, l = -1), "lines"),
    legend.position = "none",
    legend.margin = margin(0, 0, 0, 0),
    legend.box.spacing = unit(0, "pt"),
    legend.box.margin = margin(0, 0, 0, 0)
  )

g <-
  p +
  geom_label(
    data = p$data %>%
      filter(outcome_group == "Indigenous Land") %>%
      mutate(estimate = case_when(
        sample == "United States" ~ 0.28,
        sample == "Australia"     ~ 0.26
      )),
    aes(label = sample),
    position = position_dodgev(height = 1.25),
    color = "white",
    size = 3.5,
    show.legend = FALSE
  )

ggsave(
  filename = paste0(fig_dir, "fig3.pdf"),
  g,
  width = 4.5,
  height = 6.5
)

## ---- In-text estimates: Section 3.1 ----------------------------------------

## Recall proportions by treatment arm and sample
recall_pcts <-
  combined_dat %>%
  filter(!is.na(Z_land_info), !is.na(y_recall_binary)) %>%
  group_by(sample, Z_land_info) %>%
  summarise(pct = mean(y_recall_binary), .groups = "drop")

cat("Recall proportions by treatment arm and sample (Section 3.1):\n")
print(recall_pcts)

## First-stage: ATE on Indigenous Awareness (in percentage points)
recall_ate <-
  combined_dat %>%
  group_by(sample) %>%
  group_modify(~ tidy(lm_robust(y_recall_binary ~ Z_land_info, data = .x))) %>%
  ungroup() %>%
  filter(term == "Z_land_info") 

cat("First-stage ATE on Indigenous Awareness in PP (Section 3.1):\n")
print(recall_ate %>% select(sample, estimate, std.error, conf.low, conf.high, p.value))

## ATEs on downstream outcomes with Bonferroni-adjusted CIs
cat("ATEs on downstream outcomes (Section 3.1):\n")
print(summary_df_unadj %>%
        select(outcome_group, sample, estimate, std.error,
               ci.lwr95_adj, ci.upr95_adj, ci.lwr90_adj, ci.upr90_adj, pval_adj))

## ---- Figure 4: Study 2 average treatment effects ----------------------------

mine_dat <-
  combined_dat %>%
  select(
    sample,
    admin_response_id,
    y_trust_m_n,
    y_reputation_m_n,
    y_effective_m_n,
    y_sincerity_m_n,
    Z_vignette_a,
    Z_vignette_order
  ) %>%
  group_by(sample) %>%
  mutate(
    y_mine_aidx = c(
      y_trust_m_n + y_reputation_m_n + y_effective_m_n + y_sincerity_m_n
    ),
    y_mine_aidx_s = glass_delta(Y = y_mine_aidx, Z = Z_vignette_a,
                                reference = "Control"),
    Z_vig_type = "Mining"
  ) %>%
  rename(y_aidx   = y_mine_aidx,
         y_aidx_s = y_mine_aidx_s,
         Z        = Z_vignette_a,
         Z_vig_order = Z_vignette_order,
         y_trust_n      = y_trust_m_n,
         y_reputation_n = y_reputation_m_n,
         y_effective_n  = y_effective_m_n,
         y_sincerity_n  = y_sincerity_m_n)

ent_dat <-
  combined_dat %>%
  select(
    sample,
    admin_response_id,
    y_trust_e_n,
    y_reputation_e_n,
    y_effective_e_n,
    y_sincerity_e_n,
    Z_vignette_b,
    Z_vignette_order
  ) %>%
  group_by(sample) %>%
  mutate(
    y_ent_aidx = c(
      y_trust_e_n + y_reputation_e_n + y_effective_e_n + y_sincerity_e_n
    ),
    y_ent_aidx_s = glass_delta(Y = y_ent_aidx, Z = Z_vignette_b,
                               reference = "Control"),
    Z_vig_type = "Entertainment"
  ) %>%
  rename(y_aidx   = y_ent_aidx,
         y_aidx_s = y_ent_aidx_s,
         Z        = Z_vignette_b,
         Z_vig_order = Z_vignette_order,
         y_trust_n      = y_trust_e_n,
         y_reputation_n = y_reputation_e_n,
         y_effective_n  = y_effective_e_n,
         y_sincerity_n  = y_sincerity_e_n)

stack_dat <- bind_rows(mine_dat, ent_dat)

## Pooled ATEs by sample
est_pool <-
  stack_dat %>%
  group_by(sample) %>%
  group_modify(~ tidy(
    lm_lin(
      y_aidx_s ~ Z,
      covariates = ~ Z_vig_order + Z_vig_type,
      data = .x,
      clusters = admin_response_id
    )
  )) %>%
  ungroup() %>%
  mutate(estimator = "Pooled")

vig_summary_unadj <-
  est_pool %>%
  filter(term %in% c("ZSubstantive", "ZSymbolic")) %>%
  mutate(
    conf.low90  = estimate - qnorm(1 - (0.10) / 2) * std.error,
    conf.high90 = estimate + qnorm(1 - (0.10) / 2) * std.error,
    sample = factor(
      sample,
      levels = c("AU", "US"),
      labels = c("Australia", "United States")
    ),
    Z = factor(
      term,
      levels = rev(c("ZSymbolic", "ZSubstantive")),
      labels = rev(c("Symbolic", "Substantive"))
    )
  )

pheight <- 0.4

p <-
  vig_summary_unadj %>%
  ggplot(data = ., aes(x = estimate, y = Z, fill = sample, color = sample,
                       group = sample)) +
  geom_vline(xintercept = 0,    color = "black", linetype = "solid",  linewidth = 0.2) +
  geom_vline(xintercept = -0.2, color = "black", linetype = "dotted", linewidth = 0.2) +
  geom_vline(xintercept =  0.2, color = "black", linetype = "dotted", linewidth = 0.2) +
  geom_errorbar(
    aes(xmin = conf.low, xmax = conf.high),
    width = 0,
    position = position_dodgev(height = pheight),
    linewidth = .75,
    show.legend = TRUE,
    orientation = "y"
  ) +
  geom_errorbar(
    aes(xmin = conf.low90, xmax = conf.high90),
    width = 0,
    position = position_dodgev(height = pheight),
    linewidth = 1.5,
    show.legend = TRUE,
    orientation = "y"
  ) +
  geom_point(
    position = position_dodgev(height = pheight),
    shape = 22,
    size = 4,
    color = "white"
  ) +
  scale_x_continuous(limits = c(-0.22, 0.62),
                     breaks = c(-0.2, 0, 0.20, 0.4, 0.6)) +
  scale_color_manual("", values = c("black", "darkgrey")) +
  scale_fill_manual("", values = c("black", "darkgrey")) +
  theme_tufte(base_size = 14) +
  theme(
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_line(linewidth = 0.25),
    axis.line.x = element_line(linewidth = 0.25),
    axis.text.y = element_text(size = 14, color = "black"),
    axis.text.x = element_text(size = 12, color = "black"),
    plot.margin = unit(c(t = -1, r = 0, b = 0, l = -1), "lines"),
    legend.position = "none",
    legend.margin = margin(0, 0, 0, 0),
    legend.box.spacing = unit(0, "pt"),
    legend.box.margin = margin(0, 0, 0, 0)
  ) +
  labs(subtitle = "", x = "Average treatment effect (in standard units)", y = "")

g <-
  p +
  geom_label(
    data = p$data %>%
      filter(Z == "Symbolic") %>%
      mutate(estimate = case_when(
        sample == "United States" ~ 0.42,
        sample == "Australia"     ~ 0.40
      )),
    aes(label = sample),
    position = position_dodgev(height = pheight),
    color = "white",
    size = 3.5,
    show.legend = FALSE
  )

ggsave(
  filename = paste0(fig_dir, "fig4.pdf"),
  g,
  width = 5,
  height = 3
)

## ---- In-text estimates: Section 4.1 ----------------------------------------

## Symbolic and Substantive ATEs (Pooled)
cat("Figure 4 ATEs: Pooled by sample (Section 4.1):\n")
print(vig_summary_unadj %>%
        select(sample, Z, estimate, std.error,
               conf.low, conf.high, conf.low90, conf.high90, p.value))

## Substantive vs. Symbolic contrasts
sub_vs_sym <-
  est_pool %>%
  filter(term %in% c("ZSubstantive", "ZSymbolic")) %>%
  select(sample, term, estimate, std.error) %>%
  pivot_wider(names_from = term,
              values_from = c(estimate, std.error)) %>%
  mutate(
    estimate    = estimate_ZSubstantive - estimate_ZSymbolic,
    std.error   = sqrt(std.error_ZSubstantive^2 + std.error_ZSymbolic^2),
    conf.low    = estimate - qnorm(1 - (0.05) / 2) * std.error,
    conf.high   = estimate + qnorm(1 - (0.05) / 2) * std.error,
    conf.low90  = estimate - qnorm(1 - (0.10) / 2) * std.error,
    conf.high90 = estimate + qnorm(1 - (0.10) / 2) * std.error,
    p.value     = 2 * pnorm(abs(estimate / std.error), lower.tail = FALSE),
    sample = factor(sample, levels = c("AU", "US"),
                    labels = c("Australia", "United States"))
  ) %>%
  select(sample, estimate, std.error,
         conf.low, conf.high, conf.low90, conf.high90, p.value)

cat("Substantive vs. Symbolic contrast (Section 4.1):\n")
print(sub_vs_sym %>%
        select(sample, estimate, std.error,
               conf.low, conf.high, conf.low90, conf.high90, p.value))

## ---- Session Info -----------------------------------------------------------
sessionInfo()
